import os
import json
import traceback
import re
def count_files(folder_path, output_file_name='candidate.jsonl'):
    file_count = 0
    file_list = os.listdir(folder_path)

    test_path = os.path.join(folder_path, output_file_name)
    if os.path.exists(test_path):
        print(f"remove {output_file_name}")
        os.remove(test_path)
    
    for file_name in file_list:
        file_path = os.path.join(folder_path, file_name)
        if os.path.isfile(file_path) and file_path.endswith(".json"):
            file_count += 1
            file_idx = match_file(file_name)
            if file_idx:
                print(f'processing {file_idx}')
                filter_candidate(idx=file_idx, file_path=file_path, output_file_dir=folder_path, output_file_name=output_file_name)
    print(f"{folder_path}:", file_count)
    return file_count

def match_file(file_name):
    result = None
    match = re.match(r'\-?\d+\_', file_name)
    if match:
        result = match.group()[:-1]
    return result

def filter_candidate(idx, file_path="./test.json", output_file_dir=".", output_file_name="test_filterd.jsonl"):

    with open(os.path.join(file_path), "r") as fr:
        candidate_list = json.load(fr)
        candidate_list_filtered = []
        flag = 0
        for candidate in candidate_list["compare_candidates"]:
            candidate_filtered = []
            for single in candidate:
                if flag == 0:
                    # print(single)
                    flag = 1
                try:
                    elo = single['Elo']
                    candidate_filtered.append({
                        "Elo": elo,
                        "depth": single['depth'],
                        "type": single['node_type'],
                        "description": single['description'],
                        "observation": single['observation'] if 'observation' in single.keys() else None
                    })
                except BaseException as e:
                    print(traceback.format_exc(), e)
            if len(candidate_filtered) > 0:
                candidate_list_filtered.append({
                    'idx': idx,
                    'content':candidate_filtered
            })
        with open(os.path.join(output_file_dir, output_file_name), "a") as fw:
            if len(candidate_list_filtered) > 0:
                json.dump(candidate_list_filtered, fw)
                fw.write('\n')
        # print(candidate_list_filtered[0])
        # print(candidate_list_filtered[1])
        # model chose 0. it is right.
def run(data_dir, input_dir, output_file_name="candidate.jsonl"):
    count_files(folder_path=os.path.join(data_dir, input_dir), output_file_name=output_file_name)

if __name__ == '__main__':
    

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', type=str, default='', required=True, help='data_dir')
    parser.add_argument('--input_dir', type=str, default='', required=True, help='input_dir')
    args = parser.parse_args()
    input_dir = args.input_dir
    data_dir = args.data_dir

    run(data_dir, input_dir)